import requests
import re
import os

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
url = 'http://www.baidu.com/s?tn=news&rtt=1&bsst=1&cl=2&wd=财经'

print(os.getcwd()) 

try:
    res = requests.get(url, headers=headers)
    res.raise_for_status()
    html_content = res.text

    p_title = '<h3 class="news-title_1YtI1 ">.*?>(.*?)</a>'
    title = re.findall(p_title, html_content, re.S)
    for i in range(len(title)):
        title[i] = re.sub('<.*?>', '', title[i])

    p_href = '<h3 class="news-title_1YtI1 "><a href="(.*?)"'
    href = re.findall(p_href, html_content, re.S)

    p_date = '<span class="c-color-gray2 c-font-normal c-gap-right-xsmall" .*?>(.*?)</span>'
    date = re.findall(p_date, html_content)

    p_source = '<span class="c-color-gray" .*?>(.*?)</span>'
    source = re.findall(p_source, html_content)

    with open('news_info.txt', 'w', encoding='utf-8') as file:
        for i in range(min(len(title), len(date), len(source))):
            line = str(i + 1) + '.' + title[i] + '(' + source[i] + ' ' + date[i] + ')' + '\n' + href[i] + '\n'
            file.write(line)
            print(line.strip())
except requests.RequestException as e:
    print(f"请求出错: {e}")
except Exception as e:
    print(f"发生未知错误: {e}")
